library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.5.0 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(jsonlite)
## 
## Attaching package: 'jsonlite'
## 
## The following object is masked from 'package:purrr':
## 
##     flatten
spotify <- "StreamingHistory0.JSON" %>%
  fromJSON() %>%
  as_tibble() %>%
  glimpse()
## Rows: 7,017
## Columns: 4
## $ endTime    <chr> "2022-02-24 00:05", "2022-02-24 02:06", "2022-02-24 02:17",…
## $ artistName <chr> "Two Psychologists Four Beers", "Kamasi Washington", "Super…
## $ trackName  <chr> "Episode 81: Against Retribution", "Clair de Lune", "Vive L…
## $ msPlayed   <int> 2164222, 1270, 326, 667733, 828000, 963, 2152, 7530, 254053…
spotify %>%
  count(artistName) %>%
  arrange(desc(n))
## # A tibble: 1,009 × 2
##    artistName             n
##    <chr>              <int>
##  1 The Mountain Goats   710
##  2 Very Bad Wizards     526
##  3 Unknown Artist       145
##  4 The Beatles          140
##  5 Japanese Breakfast   117
##  6 Phoebe Bridgers      102
##  7 Wednesday             99
##  8 CAKE                  94
##  9 Decoding the Gurus    89
## 10 St. Vincent           81
## # … with 999 more rows
spotify <- spotify %>%
  filter(!artistName %in% c("Very Bad Wizards", "Unknown Artist", "Decoding the Gurus", "Taskmaster The Podcast", "Two Psychologists Four Beers", "Better Call Saul Insider Podcast", "Off Menu with Ed Gamble and James Acaster"))

spotify <- spotify %>%
  group_by(artistName) %>%
  mutate(plays = n())
spotify_top <-  spotify %>%
  filter(plays >= 40)

spotify_top %>%
  count(artistName) %>%
  arrange(desc(n))
## # A tibble: 21 × 2
## # Groups:   artistName [21]
##    artistName             n
##    <chr>              <int>
##  1 The Mountain Goats   710
##  2 The Beatles          140
##  3 Japanese Breakfast   117
##  4 Phoebe Bridgers      102
##  5 Wednesday             99
##  6 CAKE                  94
##  7 St. Vincent           81
##  8 Sleater-Kinney        77
##  9 R.E.M.                70
## 10 AJJ                   68
## # … with 11 more rows
ggplot(spotify_top, aes(y = artistName, x = endTime, color = artistName))+
  geom_point(position = "jitter")+
  theme(axis.text.y = element_blank())

library(lubridate)
## Loading required package: timechange
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
spotify_top$month <- month(ymd_hm(spotify_top$endTime))

spotify_top <- spotify_top %>%
  group_by(month, artistName) %>%
  mutate(monthly_plays = n())

spotify_monthly <- aggregate(monthly_plays ~ month + artistName, data = spotify_top, FUN = mean)
ggplot(spotify_monthly, aes(x = month, y = monthly_plays, color = artistName))+
  geom_line()

spotify_top8 <-  spotify_top %>%
  filter(plays > 80)

spotify_monthly <- aggregate(monthly_plays ~ month + artistName + plays, data = spotify_top8, FUN = mean)
ggplot(spotify_monthly, aes(x = month, y = monthly_plays, color = fct_reorder(artistName, plays, .desc = TRUE)))+
  geom_line(linewidth = 1.3)

Make it better

spotify_monthly <- spotify_monthly %>%
  mutate(month_name = month.name[spotify_monthly$month])

ggplot(spotify_monthly, aes(x = fct_reorder(month_name, month), y = monthly_plays, group = fct_reorder(artistName, plays, .desc = TRUE), color = fct_reorder(artistName, plays)))+
  geom_line(lineend = "round", aes(linewidth = fct_reorder(artistName, plays)))+
  scale_color_viridis_d()+
  theme_bw()+
  labs(title = "Listening Trends for Ben's top artists (2022)",
       x = "Month",
       y = "Plays",
       color = "Artist",
       linewidth = "Artist")
## Warning: Using linewidth for a discrete variable is not advised.

spotify_monthly <- spotify_monthly %>%
  mutate(month_name = month.name[spotify_monthly$month])

ggplot(spotify_monthly, aes(x = fct_reorder(month_name, month), y = monthly_plays, group = fct_reorder(artistName, plays, .desc = TRUE), color = fct_reorder(artistName, plays)))+
  geom_line(lineend = "round", 
            aes(linewidth = fct_reorder(artistName, plays)))+
  guides(color = guide_legend(reverse = TRUE), linewidth = guide_legend(reverse = TRUE))+
  scale_color_viridis_d(option = "F", direction = 1)+
  theme_classic()+
  theme(
    legend.position = c(.95, .95),
    legend.justification = c("right", "top"),
    legend.box.just = "right",
    legend.margin = margin(6, 6, 6, 6))+
  labs(title = "Listening Trends for Ben's top artists (2022)",
       x = "Month",
       y = "Plays",
       color = "Ben's top artists",
       linewidth = "Ben's top artists")
## Warning: Using linewidth for a discrete variable is not advised.

Mapping the geography of my music

artist_plays <- spotify %>%
  count(artistName) %>%
  filter(n > 11) %>%
  filter(!artistName %in% c("You're Wrong About", "My Brother, My Brother And Me"))

[after all the making functions and writing scripts we just did]

artist_towns <- read.csv("data/artist_towns.csv")
origin_coords <- read.csv("data/origin_coords.csv")

artist_map <- inner_join(artist_plays, artist_towns, by = "artistName")

artist_map <- inner_join(artist_map, origin_coords, by = "origin") %>%
  filter(!is.na(long))
artist_map$long.dec <- dms2dec(artist_map$long)

artist_map$lat.dec <- dms2dec(artist_map$lat)

#credit:https://www.r-bloggers.com/2022/02/degree-minute-second-to-decimal-coordinates/
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
world <- map_data("world")

ggplot(artist_map, aes(x = long.dec, y = lat.dec))+
  geom_point()+
  geom_polygon(data = world, aes(x = long, y = lat))

ggplot()+
  geom_polygon(data = world, aes(x = long, y = lat, group = group), fill = "grey90")+
  geom_point(data = artist_map, aes(x = long.dec, y = (lat.dec), size = n, label = artistName), color = "skyblue3", alpha = .7)+
  theme_void()+
  theme(legend.position = "none")
## Warning in geom_point(data = artist_map, aes(x = long.dec, y = (lat.dec), :
## Ignoring unknown aesthetics: label

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
music_map <- 
ggplot()+
  geom_polygon(data = world, aes(x = long, y = lat, group = group), fill = "grey90")+
  geom_point(data = artist_map, aes(x = long.dec, y = (lat.dec), size = n, label = artistName), color = "skyblue3", alpha = .7)+
  theme_void()+
  theme(legend.position = "none")
## Warning in geom_point(data = artist_map, aes(x = long.dec, y = (lat.dec), :
## Ignoring unknown aesthetics: label
ggplotly(music_map, tooltip = "label")